package sis.ra.yahoor.result;

import sis.ra.utility.Utils;

public class getSentenceWithCompanyProduct {

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		String content=Utils.readFile("C:\\Documents and Settings\\I820753\\Desktop\\dataset\\result1006.all");
		String [] lines=content.split("\n");
		int count=0;
		String companyname="";
		String productname="";
		String productalias="";
		for (int i=0;i<lines.length;i++)
		{
			if (lines[i].length()<1) continue;
			if (lines[i].startsWith("C:\\Documents and Settings\\I820753\\Desktop\\dataset\\yahoo result_1006"))
			{
				System.out.println(lines[i]);
				System.out.println(lines[i+1]);
				companyname=lines[i+1].split("\t")[0].replace("COMPANYPRODUCT: ", "");
				productname=lines[i+1].split("\t")[1];
				
				if (productname.contains("|"))
				{
					String [] pronames=productname.split("\\|");
					productname=pronames[0];
					productalias=pronames[1];
					System.out.println("=========="+productalias+"========"+productname);
				}
				System.out.println(companyname+"  "+productname);
				Utils.writeToFile("C:\\Documents and Settings\\I820753\\Desktop\\dataset\\result1006.snippetCP", lines[i],true,true);
				Utils.writeToFile("C:\\Documents and Settings\\I820753\\Desktop\\dataset\\result1006.snippetCP", lines[i+1],true, true);
				i=i+1;
			}else
			{
				
				String [] snipps=lines[i].split("<b>\\.\\.\\.</b>");
				for (int j=0;j<snipps.length;j++)
				{
					if (snipps[j].trim().length()<2) continue;
//					String [] snipps2=snipps[j].split("<b>...</b>");
//					for (int m=0;m<snipps2.length;m++)
					String cursnip=snipps[j].trim().replace("</b> <b>", " ").replace("</b><b>", "").replace("</b>! <b>", "! ").replace("</b>-<b>", "-");
//					System.out.println(cursnip);
					//<b>PRODUCT</b> is a California-based subsidiary company of <b>Amazon.com</b> that is, remove "."
					String [] ss=cursnip.split("<b>");
					String removepunsnip=ss[0];
					for (int sscount=1;sscount<ss.length;sscount++)
					{
//						System.out.println(ss[sscount]);
						String ssfirst=Utils.removePunctuation(ss[sscount].split("</b>")[0]).toLowerCase()+"</b>";
						if (ss[sscount].split("</b>").length==2)
						ssfirst=ssfirst+ss[sscount].split("</b>")[1];
						removepunsnip=removepunsnip+"<b>"+ssfirst;
					}
					cursnip=removepunsnip;
					productname=Utils.removePunctuation(productname);
					companyname=Utils.removePunctuation(companyname);
					cursnip=cursnip.replace(productname.toLowerCase(), "PRODUCT").replace(companyname.toLowerCase(), "COMPANY");
//					System.out.println(companyname+" "+productname+" "+cursnip);
					if (productalias.length()>0) cursnip=cursnip.replace(productalias.toLowerCase(), "PRODUCT");
					if (cursnip.contains("PRODUCT")&&cursnip.contains("COMPANY"))
					{
						System.out.println("==>"+cursnip);		
						Utils.writeToFile("C:\\Documents and Settings\\I820753\\Desktop\\dataset\\result1006.snippetCP","==>"+cursnip,true,true);
						count++;
					}
					else
					{
						System.out.println("++>"+cursnip);
						Utils.writeToFile("C:\\Documents and Settings\\I820753\\Desktop\\dataset\\result1006.snippetCP","++>"+cursnip,true,true);
					}					
				}
			}
		}
		System.out.println(count);
	}
}
